home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Tech Arsenal 1
/
Tech Arsenal (Arsenal Computer).ISO
/
tek-02
/
lysrc.zip
/
LEXTABLE.PAS
< prev
next >
Wrap
Pascal/Delphi Source File
|
1993-01-24
|
14KB
|
452 lines
unit LexTables;
(* 2-5-91 AG
5-13-92 AG (bug fix in merge_trans) *)
(* Copyright (c) 1990,91 by Albert Graef, Schillerstr. 18,
6509 Schornsheim/Germany
All rights reserved *)
interface
uses LexBase;
(* This module collects the various tables used by the Lex program:
- the symbol table
- the position table
- the DFA states and transition tables
Note: All tables are allocated dynamically (at initialization time)
because of the 64KB static data limit. *)
var max_bytes : LongInt;
(* available memory *)
function n_bytes : LongInt;
(* memory actually used *)
const
(* Maximum table sizes: *)
max_keys = 997; (* size of hash symbol table (prime number!) *)
max_pos = 600; (* maximum number of positions *)
max_states = 300; (* number of DFA states *)
max_trans = 600; (* number of transitions *)
max_start_states = 50; (* maximum number of user-defined start states *)
var
(* Actual table sizes: *)
n_pos : Integer;
n_states : Integer;
n_trans : Integer;
n_start_states : Integer;
type
(* Table data structures: *)
SymTable = array [1..max_keys] of record
pname : StrPtr;
(* print name; empty entries are denoted by pname=nil *)
case sym_type : ( none, macro_sym, start_state_sym ) of
macro_sym : ( subst : StrPtr );
(* macro substitution *)
start_state_sym : ( start_state : Integer );
(* start state *)
end;
PosTableEntry = record
follow_pos : IntSetPtr;
(* set of follow positions *)
case pos_type : ( char_pos, cclass_pos, mark_pos ) of
char_pos : ( c : Char );
(* character position *)
cclass_pos : ( cc : CClassPtr );
(* character class position *)
mark_pos : ( rule, pos : Integer );
(* mark position *)
end;
PosTable = array [1..max_pos] of PosTableEntry;
FirstPosTable = array [0..2*max_start_states+1] of IntSetPtr;
(* first positions for start states (even states
are entered anywhere on the line, odd states only
at the beginning of the line; states 0 and 1 denote
default, states 2..2*n_start_states+1 user-defined
start states) *)
StateTableEntry = record
state_pos : IntSetPtr;
(* positions covered by state *)
final : Boolean;
(* final state? *)
trans_lo,
trans_hi : Integer;
(* transitions *)
end;
StateTable = array [0..max_states-1] of StateTableEntry;
TransTableEntry = record
cc : CClassPtr;
(* characters of transition *)
follow_pos : IntSetPtr;
(* follow positions (positions of next state) *)
next_state : Integer;
(* next state *)
end;
TransTable = array [1..max_trans] of TransTableEntry;
var
verbose : Boolean; (* status of the verbose option *)
optimize : Boolean; (* status of the optimization option *)
sym_table : ^SymTable; (* symbol table *)
pos_table : ^PosTable; (* position table *)
first_pos_table : ^FirstPosTable; (* first positions table *)
state_table : ^StateTable; (* DFA state table *)
trans_table : ^TransTable; (* DFA transition table *)
(* Operations: *)
(* Hash symbol table:
The following routines are supplied to be used with the generic hash table
routines in LexBase. *)
function lookup(k : Integer) : String;
(* print name of symbol no. k *)
procedure entry(k : Integer; symbol : String);
(* enter symbol into table *)
(* Routines to build the position table: *)
procedure addCharPos(c : Char);
procedure addCClassPos(cc : CClassPtr);
procedure addMarkPos(rule, pos : Integer);
(* Positions are allocated in the order of calls to addCharPos, addCClassPos
and addMarkPos, starting at position 1. These routines also initialize
the corresponding follow sets. *)
(* Routines to build the state table: *)
var act_state : Integer; (* state currently considered *)
function newState(POS : IntSetPtr) : Integer;
(* Add a new state with the given position set; initialize the state's
position set to POS (the offsets into the transition table are
initialized when the state becomes active, see startStateTrans, below).
Returns: the new state number *)
function addState(POS : IntSetPtr) : Integer;
(* add a new state, but only if there is not already a state with the
same position set *)
procedure startStateTrans;
procedure endStateTrans;
(* initializes act_state's first and last offsets into the transition
table *)
function n_state_trans(i : Integer) : Integer;
(* return number of transitions in state i *)
procedure addTrans(cc : CClass; FOLLOW : IntSetPtr);
(* adds a transition to the table *)
procedure mergeTrans;
(* sorts transitions w.r.t. next states and merges transitions for the
same next state in the active state *)
procedure sortTrans;
(* sort transitions in act_state lexicographically *)
implementation
uses LexMsgs;
function n_bytes : LongInt;
begin
n_bytes := max_bytes-memAvail
end(*n_bytes*);
(* Hash table routines: *)
function lookup(k : Integer) : String;
begin
with sym_table^[k] do
if pname=nil then
lookup := ''
else
lookup := copy(pname^, 1, length(pname^))
end(*lookup*);
procedure entry(k : Integer; symbol : String);
begin
with sym_table^[k] do
begin
pname := newStr(symbol);
sym_type := none;
end
end(*entry*);
(* Routines to build the position table: *)
procedure addCharPos(c : Char);
begin
inc(n_pos);
if n_pos>max_pos then fatal(pos_table_overflow);
pos_table^[n_pos].follow_pos := newIntSet;
pos_table^[n_pos].pos_type := char_pos;
pos_table^[n_pos].c := c;
end(*addCharPos*);
procedure addCClassPos(cc : CClassPtr);
begin
inc(n_pos);
if n_pos>max_pos then fatal(pos_table_overflow);
pos_table^[n_pos].follow_pos := newIntSet;
pos_table^[n_pos].pos_type := cclass_pos;
pos_table^[n_pos].cc := cc;
end(*addCClassPos*);
procedure addMarkPos(rule, pos : Integer);
begin
inc(n_pos);
if n_pos>max_pos then fatal(pos_table_overflow);
pos_table^[n_pos].follow_pos := newIntSet;
pos_table^[n_pos].pos_type := mark_pos;
pos_table^[n_pos].rule := rule;
pos_table^[n_pos].pos := pos;
end(*addMarkPos*);
(* Routines to build the state table: *)
function newState(POS : IntSetPtr) : Integer;
begin
if n_states>=max_states then fatal(state_table_overflow);
newState := n_states;
with state_table^[n_states] do
begin
state_pos := POS;
final := false;
end;
inc(n_states);
end(*newState*);
function addState(POS : IntSetPtr) : Integer;
var i : Integer;
begin
for i := 0 to pred(n_states) do
if equal(POS^, state_table^[i].state_pos^) then
begin
addState := i;
exit;
end;
addState := newState(POS);
end(*addState*);
procedure startStateTrans;
begin
state_table^[act_state].trans_lo := succ(n_trans);
end(*startStateTrans*);
procedure endStateTrans;
begin
state_table^[act_state].trans_hi := n_trans;
end(*endStateTrans*);
function n_state_trans(i : Integer) : Integer;
begin
with state_table^[i] do
n_state_trans := trans_hi-trans_lo+1
end(*n_state_trans*);
(* Construction of the transition table:
This implementation here uses a simple optimization which tries to avoid
the construction of different transitions for each individual character
in large character classes by MERGING transitions whenever possible. The
transitions, at any time, will be partitioned into transitions on disjoint
character classes. When adding a new transition on character class cc, we
repartition the transitions as follows:
1. If the current character class cc equals an existing one, we can
simply add the new follow set to the existing one.
2. Otherwise, for some existing transition on some character class
cc1 with cc*cc1<>[], we replace the existing transition by a new
transition on cc*cc1 with follow set = cc1's follow set + cc's follow
set, and, if necessary (i.e. if cc1-cc is nonempty), a transition on
cc1-cc with follow set = cc1's follow set. We then remove the elements
of cc1 from cc, and proceed again with step 1.
We may stop this process as soon as cc becomes empty (then all characters
in cc have been distributed among the existing partitions). If cc does
NOT become empty, we have to construct a new transition for the remaining
character class (which then will be disjoint from all other character
classes in the transition table). *)
procedure addTrans(cc : CClass; FOLLOW : IntSetPtr);
var
i : Integer;
cc0, cc1, cc2 : CClass;
begin
for i := state_table^[act_state].trans_lo to n_trans do
if trans_table^[i].cc^=cc then
begin
setunion(trans_table^[i].follow_pos^, FOLLOW^);
exit
end
else
begin
cc0 := cc*trans_table^[i].cc^;
if cc0<>[] then
begin
cc1 := trans_table^[i].cc^-cc;
cc2 := cc-trans_table^[i].cc^;
if cc1<>[] then
begin
trans_table^[i].cc^ := cc1;
inc(n_trans);
if n_trans>max_trans then fatal(trans_table_overflow);
trans_table^[n_trans].cc := newCClass(cc0);
trans_table^[n_trans].follow_pos := newIntSet;
trans_table^[n_trans].follow_pos^ :=
trans_table^[i].follow_pos^;
setunion(trans_table^[n_trans].follow_pos^, FOLLOW^);
end
else
begin
trans_table^[i].cc^ := cc0;
setunion(trans_table^[i].follow_pos^, FOLLOW^);
end;
cc := cc2;
if cc=[] then exit;
end
end;
inc(n_trans);
if n_trans>max_trans then fatal(trans_table_overflow);
trans_table^[n_trans].cc := newCClass(cc);
trans_table^[n_trans].follow_pos := newIntSet;
trans_table^[n_trans].follow_pos^ := FOLLOW^;
end(*addCharTrans*);
(* comparison and swap procedures for sorting transitions: *)
{$F+}
function transLessNextState(i, j : Integer) : Boolean;
{$F-}
(* compare transitions based on next states (used in mergeCharTrans) *)
begin
transLessNextState := trans_table^[i].next_state<
trans_table^[j].next_state
end(*transLessNextState*);
{$F+}
function transLess(i, j : Integer) : Boolean;
{$F-}
(* lexical order on transitions *)
var c : Char; xi, xj : Boolean;
begin
for c := #0 to #255 do
begin
xi := c in trans_table^[i].cc^;
xj := c in trans_table^[j].cc^;
if xi<>xj then
begin
transLess := xi>xj;
exit
end;
end;
transLess := false
end(*transLess*);
{$F+}
procedure transSwap(i, j : Integer);
{$F-}
(* swap transitions i and j *)
var x : TransTableEntry;
begin
x := trans_table^[i];
trans_table^[i] := trans_table^[j];
trans_table^[j] := x;
end(*transSwap*);
procedure mergeTrans;
var
i, j, n_deleted : Integer;
begin
(* sort transitions w.r.t. next states: *)
quicksort(state_table^[act_state].trans_lo,
n_trans,
transLessNextState,
transSwap);
(* merge transitions for the same next state: *)
n_deleted := 0;
for i := state_table^[act_state].trans_lo to n_trans do
if trans_table^[i].cc<>nil then
begin
j := succ(i);
while (j<=n_trans) and
(trans_table^[i].next_state =
trans_table^[j].next_state) do
begin
(* merge cclasses of transitions i and j, then mark
transition j as deleted *)
trans_table^[i].cc^ := trans_table^[i].cc^+
trans_table^[j].cc^;
trans_table^[j].cc := nil;
inc(n_deleted);
inc(j);
end;
end;
(* remove deleted transitions: *)
j := state_table^[act_state].trans_lo;
for i := state_table^[act_state].trans_lo to n_trans do
if trans_table^[i].cc<>nil then
if i<>j then
begin
trans_table^[j] := trans_table^[i];
inc(j);
end
else
inc(j);
(* update transition count: *)
dec(n_trans, n_deleted);
end(*mergeTrans*);
procedure sortTrans;
begin
quicksort(state_table^[act_state].trans_lo,
n_trans,
transLess,
transSwap);
end(*sortTrans*);
var i : Integer;
begin
verbose := false;
optimize := false;
max_bytes := memAvail;
n_pos := 0;
n_states := 0;
n_trans := 0;
n_start_states := 0;
(* allocate tables: *)
new(sym_table);
new(pos_table);
new(first_pos_table);
new(state_table);
new(trans_table);
(* initialize symbol table: *)
for i := 1 to max_keys do sym_table^[i].pname := nil;
end(*LexTables*).